## -----------------------------------------------------------------------------
##  Title:   Script to reproduce analysis reported in
##           Transforming Stability into Change: 
##              How the Media Select and Report Opinion Polls 
##  Date:    September 2019
##  Journal: International Journal of Press/Politics
##
## -----------------------------------------------------------------------------
## Preliminary notes:
## -----------------------------------------------------------------------------
## 1) readme.txt contains brief variable descriptions (see paper and SI)
## 2) Script focuses on main analysis and only parts of the SI
##    - raw media documents cannot be shared publicly, hence text analysis part
##      is not included in the script
##    - variables created based on that analysis are in the data

# load packages
library("dplyr")
library("lme4")
library("arm")
library("merTools")
library("texreg")
library("ggplot2")
library("gridExtra")

## helper function
## mean center and divide by 2 standard deviation
two_sd <- function(x) {
  return((x - mean(x, na.rm = TRUE))/(2*sd(x, na.rm = TRUE)))
}

## load data for single level analysis and for hierarchical models
polls <- read.csv("polls.csv", stringsAsFactors = FALSE,
                  header = TRUE)
poll_hierarchical <- read.csv("polls-hierarchical.csv", stringsAsFactors = FALSE,
                               header = TRUE)


## 0. Descriptives (reported in paper)
p11 <- ggplot(polls, aes(x = volat_own_prev)) +
  geom_histogram(fill = "grey80", colour = "black") +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm"))  +
  labs(
    x = "",
    y = "",
    title    = "Distribution of volatility (479 polls)",
    caption  = "",
    subtitle  = "Mean: 3.36 SD: 1.58")

polls$poll_d <- as.Date(polls$poll_date)
p12 <- ggplot(polls, aes(x = poll_d, y = volat_own_prev)) +
  geom_ribbon(aes(ymin = 0, ymax = volat_own_prev), fill = "grey75", alpha = 0.75) +
  geom_point(data = filter(polls, volat_own_prev > 10), aes(x = poll_d, y = volat_own_prev)) +
  geom_text(data = filter(polls, volat_own_prev > 10), aes(x = poll_d, y = volat_own_prev,
                                                           label = paste0(poll, "\n(", days, " days)")),
            vjust = 1.5) +
  geom_smooth(method = "loess", se = FALSE, color = "grey20") +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm"))  +
  scale_y_continuous("", position = "right") + 
  scale_x_date("") +
  labs(title = "Volatility throughout time")

# combine:
fig_desc_1 <- grid.arrange(p11, p12, ncol = 2)


p21 <- ggplot(polls, aes(x = mentions_main)) +
  geom_histogram(fill = "grey80", colour = "black") +   
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm"))  +
  labs(
    x = "",
    y = "",
    title    = "Distribution of mentions",
    caption  = "",
    subtitle  = "Mean: 9 SD: 11")

p22 <- ggplot(polls, aes(x = poll_d, y = mentions_main)) +
  geom_ribbon(aes(ymin = 0, ymax = mentions_main), fill = "grey75", alpha = 0.75) +
  geom_point(data = filter(polls, mentions_main > 50), aes(x = poll_d, y = mentions_main)) +
  geom_text(data = filter(polls, mentions_main > 50), aes(x = poll_d, y = mentions_main,
                                                          label = paste0(poll, "\n(", days, " days)")),
            vjust = 1.5) +
  geom_smooth(method = "loess", se = FALSE, color = "grey20") +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm"))  +
  scale_y_continuous("", position = "right") + scale_x_date("") +
  labs(title = "Selection throughout time")

fig_desc_2 <- grid.arrange(p21, p22, ncol = 2)

# Descriptive table in Appendix 1 (needs editing)
desc_tab <- polls %>% group_by(pollingfirm) %>%
  summarise(n = n(),
            min_date = min(poll_date),
            max_date = max(poll_date),
            total = round(mean(mentions_main), 0),
            volat = round(mean(volat_own_prev, na.rm = TRUE), 3),
            msig     = round(mean(n_significant, na.rm = TRUE), 2),
            mdays    = round(mean(days, na.rm = TRUE), 0))
desc_tab <- sapply(desc_tab, as.character)

desc_tab <- rbind(c("Firm", "Polls", "Earliest", "Latest", "Average mentions",
                    "Average volatility", "Average sign.", "Average days between"),
                  desc_tab)

## 1. Poll level models
# main text models
main_nb <- glm.nb(mentions_main ~ 1 + volat_own_prev_sd + 
                    any_significant + days_sd + e_proximate + factor(year),
                  data = polls)
summary(main_nb)

# Fit single level robustness checks
#   - without outliers
#   - only non-significant
#   - max_change

nb_subset <- glm.nb(mentions_main ~ 1 + volat_own_prev_sd + any_significant + days_sd + e_proximate + factor(year),
                    data = filter(polls, mentions_main <= 50 & volat_own_prev < 10))
summary(nb_subset)

nb_ns <- glm.nb(mentions_main ~ 1 + volat_own_prev_sd + days_sd + e_proximate + factor(year),
                data = filter(polls, any_significant == 0))
summary(nb_ns)

max_nb <- glm.nb(mentions_main ~ 1 + max_change_sd + any_significant + days_sd + e_proximate + factor(year),
                 data = polls)
summary(max_nb)

# Formatted table for robustness checks (in appendix)
screenreg(list(nb_subset, nb_ns, max_nb),
          custom.model.names = c("Extreme values excluded", "Not-significant only", "Maximum change"),
          center = TRUE,
          dcolumn = TRUE,
          use.packages = FALSE,
          caption.above = TRUE,
          float.pos = "!h",
          custom.coef.names = c("Intercept", "Change (2 SD)",
                                "Any significant change (= 1)",
                                "Days since last poll (2 SD)",
                                "Election campaign (= 1)",
                                "2012", "2013", "2014", "2015",
                                "Change (2 SD)"),
          include.deviance = FALSE,
          include.bic = FALSE,
          custom.gof.names = c("AIC", "LogLik", "N"),
          caption = "Alternative models: negative binomial models of overall article count")

## 2. Hierarchical models
## within-dyad centering of predictors
poll_hierarchical <- poll_hierarchical %>% 
  group_by(dyad_id) %>%
  mutate(volat_own_prev_sd = two_sd(volat_own_prev),
         max_change_sd     = two_sd(max_change),
         days_sd           = two_sd(days))

# Main model (in text)
nb_hierarchical <- glmer.nb(mentions ~ 1 +
                              volat_own_prev_sd + any_significant + days_sd + e_proximate +
                              partners + factor(year) +
                              (1 | dyad_id),
                            control = glmerControl(optimizer = "bobyqa",
                                                   optCtrl   = list(maxfun = 2e8)),
                            data = poll_hierarchical)
summary(nb_hierarchical)

# For appendix
nbhlm_subset <- glmer.nb(mentions ~ 1 + volat_own_prev_sd + any_significant + 
                           days_sd + e_proximate + factor(year) +
                           (1 | dyad_id),
                         control = glmerControl(optimizer = "bobyqa",
                                                optCtrl   = list(maxfun = 2e8)),
                         data = filter(poll_hierarchical, volat_own_prev < 10))
summary(nbhlm_subset)

nbhlm_ns <- glmer.nb(mentions ~ 1 + volat_own_prev_sd + 
                       days_sd + e_proximate + factor(year) +
                       (1 | dyad_id),
                     control = glmerControl(optimizer = "bobyqa",
                                            optCtrl   = list(maxfun = 2e8)),
                     data = filter(poll_hierarchical, any_significant == 0))
summary(nbhlm_ns)

max_nbhlm <- glmer.nb(mentions ~ 1 + max_change_sd + any_significant + 
                        days_sd + e_proximate + factor(year) +
                        (1 | dyad_id),
                      control = glmerControl(optimizer = "bobyqa",
                                             optCtrl   = list(maxfun = 2e8)),
                      data = poll_hierarchical)
summary(max_nbhlm)

# Create formatted table
screenreg(list(nbhlm_subset, nbhlm_ns, max_nbhlm),
          custom.model.names = c("Extreme values excluded", "Not-significant only", "Maximum change"),
          center = TRUE,
          dcolumn = TRUE,
          use.packages = FALSE,
          caption.above = TRUE,
          float.pos = "!h",
          custom.coef.names = c("Intercept", "Change (2 SD)",
                                "Any significant change (= 1)",
                                "Days since last poll (2 SD)",
                                "Election campaign (= 1)",
                                "2012", "2013", "2014", "2015",
                                "Change (2 SD)"),
          include.deviance = FALSE,
          include.bic = FALSE,
          custom.gof.names = c("AIC", "LogLik", "N", "Dyads", "Var (Intercept)"),
          caption = "Alternative models: hierarchical negative binomial models of overall article count")

## Media count models (appendix and referenced in text): poll level media heterogeneity
outlets <- poll_hierarchical %>%
  ungroup() %>%
  group_by(poll) %>%
  summarise(media = sum(mentions > 0),
            days  = unique(days),
            e_proximate = unique(e_proximate),
            volat_own_prev = unique(volat_own_prev),
            any_significant = unique(any_significant),
            year = unique(year))

outlets <- outlets %>%
  mutate(volat_own_prev_sd = two_sd(volat_own_prev),
         days_sd           = two_sd(days))

outlets_model <- glm.nb(media ~ 1 + volat_own_prev_sd + any_significant +
                          days_sd + e_proximate + factor(year),
                        data = outlets)
summary(outlets_model)

reported_only <- glm.nb(media ~ 1 + volat_own_prev_sd + any_significant + 
                          days_sd + e_proximate + factor(year),
                        data = filter(outlets, media > 0))
summary(reported_only)

screenreg(list(outlets_model, reported_only),
          custom.model.names = c("Outlet count", "Outlet count (one or more reports)"),
          center = TRUE,
          dcolumn = TRUE,
          use.packages = FALSE,
          caption.above = TRUE,
          float.pos = "!h",
          custom.coef.names = c("Intercept", "Change (2 SD)",
                                "Any significant change (= 1)",
                                "Days since last poll (2 SD)",
                                "Election campaign (=1)",
                                "2012", "2013", "2014", "2015"),
          include.deviance = FALSE,
          include.bic = FALSE,
          custom.gof.names = c("AIC", "LogLik", "N"),
          caption = "Model results: negative binomial model of outlet count")

# Main results summary table
screenreg(list(main_nb, nb_hierarchical),
          center = TRUE,
          dcolumn = TRUE,
          use.packages = FALSE,
          caption.above = TRUE,
          float.pos = "!h",
          omit.coef = "year",
          custom.coef.names = c("Intercept", "Volatility (2 SD)",
                                "Any significant change (= 1)",
                                "Days since last poll (2 SD)",
                                "Election campaign (= 1)",
                                "Partner (= 1)"),
          include.deviance = FALSE,
          include.bic = FALSE,
          custom.gof.names = c("AIC", "LogLik", "N", "Dyads", "Var(Intercept)"),
          caption = "Models of selection")

# Create summary plots - Main results figure
pred_data <- data.frame(volat_own_prev_sd = seq(min(polls$volat_own_prev_sd, na.rm = TRUE),
                                                max(polls$volat_own_prev_sd, na.rm = TRUE),
                                                0.1),
                        any_significant = 0,
                        e_proximate     = 0,
                        days_sd         = 0,
                        year            = 2013
)

pnb <- predict(main_nb,
               newdata = pred_data,
               se = TRUE)

pnb <- data.frame(
  volat_own_prev_sd    = pred_data$volat_own_prev_sd,
  est       = exp(pnb$fit),
  lower     = exp(pnb$fit - 1.96*pnb$se.fit),
  upper     = exp(pnb$fit + 1.96*pnb$se.fit))
polls$dev_high <- 0
polls$dev_high[polls$volat_own_prev > 10] <- 1
polls$dev_high[polls$mentions_main > 50] <- 1

p1 <- ggplot(polls,
             aes(x = volat_own_prev_sd, y = mentions_main)) +
  geom_ribbon(data = filter(pnb, volat_own_prev_sd >= -0.5 & volat_own_prev_sd <= 0.5),
              aes(y = est, ymax = upper, ymin = lower), alpha = 0.5, fill = "darkred") +
  geom_jitter(aes(colour = factor(dev_high)), alpha = 0.5, width = 0.025, height = 0.025)  +
  scale_color_manual("", values = c("grey70", "black"), guide = FALSE) +
  geom_line(data = pnb, aes(y = est), size = 0.85) +
  geom_line(data = pnb, aes(y = lower), linetype = 2, size = 0.5) +
  geom_line(data = pnb, aes(y = upper), linetype = 2, size = 0.5) +
  scale_x_continuous(breaks = c(quantile(polls$volat_own_prev_sd, 0, na.rm = TRUE),
                                quantile(polls$volat_own_prev_sd, .25, na.rm = TRUE),
                                quantile(polls$volat_own_prev_sd, .5, na.rm = TRUE),
                                quantile(polls$volat_own_prev_sd, .75, na.rm = TRUE),
                                quantile(polls$volat_own_prev_sd, 1, na.rm = TRUE)),
                     
                     labels = c(round(quantile(polls$volat_own_prev, 0, na.rm = TRUE), 2),
                                round(quantile(polls$volat_own_prev, .25, na.rm = TRUE), 2),
                                round(quantile(polls$volat_own_prev, .5, na.rm = TRUE), 2),
                                round(quantile(polls$volat_own_prev, .75, na.rm = TRUE), 2),
                                round(quantile(polls$volat_own_prev, 1, na.rm = TRUE), 2)),
                     sec.axis = sec_axis(~.*1, breaks = c(
                       quantile(polls$volat_own_prev_sd, .25, na.rm = TRUE),
                       quantile(polls$volat_own_prev_sd, .5, na.rm = TRUE),
                       quantile(polls$volat_own_prev_sd, .75, na.rm = TRUE)))) +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 12, hjust = 0.5),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  labs(
    x = "Volatility",
    y = "Article count",
    caption  = "",
    title  = "Selection as a function of change")

polls$dev_high <- NULL

pred_data <- data.frame(volat_own_prev_sd = seq(-0.5,
                                                0.5,
                                                0.05),
                        any_significant = 0,
                        e_proximate     = 0,
                        days_sd         = 0,
                        year            = 2013
)

pnb <- predict(main_nb,
               newdata = pred_data,
               se = TRUE)

pnb <- data.frame(
  volat_own_prev_sd    = pred_data$volat_own_prev_sd,
  est       = exp(pnb$fit),
  lower     = exp(pnb$fit - 1.96*pnb$se.fit),
  upper     = exp(pnb$fit + 1.96*pnb$se.fit))

p2 <- ggplot(pnb, aes(x = volat_own_prev_sd, y = est, ymin = lower, ymax = upper)) +
  geom_jitter(data = filter(polls, mentions_main < 13 & volat_own_prev_sd <= 0.5 & volat_own_prev_sd >= -0.5),
              aes(x = volat_own_prev_sd, y = mentions_main),
              alpha = 0.5, colour = "grey70", width = 0.025, height = 0.025,
              inherit.aes = FALSE)  +
  geom_ribbon(alpha = 0.5, fill = "grey80", linetype = 2) + geom_line(colour = "black", size = 1.5) +
  scale_x_continuous("", limits = c(-0.5, 0.5),
                     breaks = c(-0.5, 0, 0.5),
                     labels = c("One SD below\nmean volatility",
                                "Average\nvolatility", "One SD above\nmean volatility"),
                     sec.axis = sec_axis(~.*1, breaks = c(-0.5, 0, 0.5),
                                         labels = c("1.78", "(350 polls)", "4.94"))) +
  scale_y_continuous("", breaks = c(0, 5, 10, 12),
                     limits = c(0, 13), position = "right") +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  labs(title = "Zoomed in for -/+ 1 SD")

# combine plots
fig_results <- grid.arrange(p1, p2, ncol = 2)

## Text features
art <- read.csv("polls-content.csv", stringsAsFactors = FALSE,
                header = TRUE)

art_title <- glmer(is_title_change ~  1 + volat_own_prev_sd + any_significant +
                     days_sd + e_proximate + partners + 
                     factor(year) +
                     (1 | poll), 
                   control = glmerControl(optimizer = "bobyqa",
                                          optCtrl   = list(maxfun = 2e8)),
                   data = art, family = binomial)
artmax_title <- glmer(is_title_change ~  1 + max_change_sd + any_significant + 
                        days_sd + e_proximate + partners + 
                        factor(year) + (1 | poll), 
                      control = glmerControl(optimizer = "bobyqa",
                                             optCtrl   = list(maxfun = 2e8)),
                      data = art, family = binomial)
art_uncert <- glmer(text_isuncertain ~ 1 + volat_own_prev_sd + any_significant + 
                      days_sd + e_proximate + partners + 
                      factor(year) + (1 | poll), 
                    control = glmerControl(optimizer = "bobyqa",
                                           optCtrl   = list(maxfun = 2e8)),
                    data = art, family = binomial)
artmax_uncert <- glmer(text_isuncertain ~ 1 + max_change_sd + any_significant +
                         days_sd + e_proximate + partners + 
                         factor(year) + (1 | poll), 
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl   = list(maxfun = 2e8)),
                       data = art, family = binomial)

art_expert <- glmer(expert_quote ~ 1 + volat_own_prev_sd + any_significant + 
                      days_sd + e_proximate + partners + 
                      factor(year) + (1 | poll), 
                    control = glmerControl(optimizer = "bobyqa",
                                           optCtrl   = list(maxfun = 2e8)),
                    data = art, family = binomial)
artmax_expert <- glmer(expert_quote ~ 1 + max_change_sd + any_significant + 
                         days_sd + e_proximate + partners + 
                         factor(year) + (1 | poll), 
                       control = glmerControl(optimizer = "bobyqa",
                                              optCtrl   = list(maxfun = 2e8)),
                       data = art, family = binomial)

# Export in table format
screenreg(list(art_title, art_uncert, art_expert),
          custom.model.names = c("Change in title", "Uncertainty", "Quote"),
          center = TRUE,
          dcolumn = TRUE,
          use.packages = FALSE,
          caption.above = TRUE,
          float.pos = "!h",
          omit.coef = "year",
          custom.coef.names = c("Intercept", "Volatility (2 SD)",
                                "Any significant change (= 1)",
                                "Days since last poll (2 SD)",
                                "Election campaign (= 1)",
                                "Partner (= 1)"),
          include.deviance = FALSE,
          include.bic = FALSE,
          custom.gof.names = c("AIC", "LogLik", "N", "Dyads", "Var(Intercept)"),
          caption = "The relationship between volatility and change coverage, models")

# Create second main results figure
pred_data <- expand.grid(volat_own_prev_sd = seq(min(art$volat_own_prev_sd, na.rm = TRUE),
                                                 max(art$volat_own_prev_sd, na.rm = TRUE),
                                                 0.1),
                         any_significant = 0,
                         e_proximate     = 0,
                         days_sd         = 0,
                         partners        = 0,
                         year = 2013,
                         poll = "")

title_res <- data.frame(cbind(pred_data, predictInterval(art_title,  newdata = pred_data, level = 0.95,
                                                         type = "probability", include.resid.var = FALSE)), what = "Change in title")
uncert_res <- data.frame(cbind(pred_data, predictInterval(art_uncert,  newdata = pred_data, level = 0.95,
                                                          type = "probability", include.resid.var = FALSE)), what = "Uncertainty")
quote_res <- data.frame(cbind(pred_data, predictInterval(art_expert,  newdata = pred_data, level = 0.95,
                                                         type = "probability", include.resid.var = FALSE)), what = "Quote")

all_res <- bind_rows(title_res, uncert_res, quote_res)

emps <- bind_rows(data.frame(fit = art$is_title_change, volat_own_prev_sd = art$volat_own_prev_sd, what = "Change in title"),
                  data.frame(fit = art$text_isuncertain, volat_own_prev_sd = art$volat_own_prev_sd,  what = "Uncertainty"),
                  data.frame(fit = art$expert_quote, volat_own_prev_sd = art$volat_own_prev_sd,  what = "Quote"))

all_res$what <- factor(all_res$what, levels = c("Change in title",
                                                "Uncertainty",
                                                "Quote"))
emps$what <- factor(emps$what, levels = c("Change in title",
                                          "Uncertainty",
                                          "Quote"))
extra_labs <- data.frame(what = c("Change in title",
                                  "Uncertainty",
                                  "Quote",
                                  "Change in title",
                                  "Uncertainty",
                                  "Quote"),
                         fit = c(1, 1, 1, 0, 0 ,0),
                         volat_own_prev_sd = -0.25,
                         vals1 = c(paste0(sum(art$is_title_change == 1), " (",
                                          round(100*sum(art$is_title_change == 1)/nrow(art), 0), "%)"),
                                   paste0(sum(art$text_isuncertain == 1), " (",
                                          round(100*sum(art$text_isuncertain == 1)/nrow(art), 0), "%)"),
                                   paste0(sum(art$expert_quote == 1), " (",
                                          round(100*sum(art$expert_quote == 1)/nrow(art), 0), "%)"),
                                   paste0(sum(art$is_title_change == 0), " (",
                                          round(100*sum(art$is_title_change == 0)/nrow(art), 0), "%)"),
                                   paste0(sum(art$text_isuncertain == 0), " (",
                                          round(100*sum(art$text_isuncertain == 0)/nrow(art), 0), "%)"),
                                   paste0(sum(art$expert_quote == 0), " (",
                                          round(100*sum(art$expert_quote == 0)/nrow(art), 0), "%)")))

content_plot <- ggplot(all_res, aes(x = volat_own_prev_sd, y = fit,
                    ymin = lwr, ymax = upr)) +
  geom_jitter(data = emps, aes(x = volat_own_prev_sd, y = fit), inherit.aes = FALSE,
              alpha = 0.25, width = 0, height = 0.03, colour = "grey80") +
  geom_ribbon(alpha = 0.5, fill = "grey80") + geom_line(size = 1) + facet_wrap(~what) +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12, face = "bold"),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 12, hjust = 0.5),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing = unit(1.5, "lines"),
        title = element_text(size = 12, face = "bold")) +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  scale_y_continuous("Estimated probability",
                     sec.axis = sec_axis(~.*1)) +
  scale_x_continuous("Volatility", breaks = c(quantile(art$volat_own_prev_sd, 0, na.rm = TRUE),
                                              quantile(art$volat_own_prev_sd, 0.25, na.rm = TRUE),
                                              quantile(art$volat_own_prev_sd, 0.5, na.rm = TRUE),
                                              quantile(art$volat_own_prev_sd, 0.75, na.rm = TRUE),
                                              quantile(art$volat_own_prev_sd, 1, na.rm = TRUE)),
                     labels = c(round(as.numeric(quantile(art$volat_own_prev, 0, na.rm = TRUE)), 1),
                                round(as.numeric(quantile(art$volat_own_prev, 0.25, na.rm = TRUE)), 1),
                                round(as.numeric(quantile(art$volat_own_prev, 0.5, na.rm = TRUE)), 1),
                                round(as.numeric(quantile(art$volat_own_prev, 0.75, na.rm = TRUE)), 1),
                                round(as.numeric(quantile(art$volat_own_prev, 1, na.rm = TRUE)), 1))) +
  geom_label(data = extra_labs, aes(x = volat_own_prev_sd, y = fit, label= vals1),
             inherit.aes = FALSE, size = 3)

content_plot

# Combine previous robustness checks into one data.frame and summary plot
# reported in appendix
rt <- rbind(
  cbind(coef(main_nb)[2], t(confint(main_nb)[2,]), "Article count", "Volatility"),
  cbind(fixef(nb_hierarchical)[2], t(confint(nb_hierarchical, method = "Wald")[3,]), "Article count (hierarchical)", "Volatility"),
  cbind(coef(max_nb)[2], t(confint(max_nb)[2,]), "Article count", "Max change"),
  cbind(fixef(max_nbhlm)[2], t(confint(max_nbhlm, method = "Wald")[3,]), "Article count (hierarchical)", "Max change"),
  
  cbind(fixef(art_title)[2], t(confint(art_title, method = "Wald")[3,]), "Title change", "Volatility"),
  cbind(fixef(art_uncert)[2], t(confint(art_uncert, method = "Wald")[3,]), "Uncertainty", "Volatility"),
  cbind(fixef(art_expert)[2], t(confint(art_expert, method = "Wald")[3,]), "Quote", "Volatility"),
  
  cbind(fixef(artmax_title)[2], t(confint(artmax_title, method = "Wald")[3,]), "Title change", "Max change"),
  cbind(fixef(artmax_uncert)[2], t(confint(artmax_uncert, method = "Wald")[3,]), "Uncertainty", "Max change"),
  cbind(fixef(artmax_expert)[2], t(confint(artmax_expert, method = "Wald")[3,]), "Quote", "Max change"))
rt <- data.frame(rt, stringsAsFactors = FALSE)
rt[, 1:3] <- sapply(rt[, 1:3], as.numeric)

names(rt) <- c("coef", "lwr", "upr", "what", "model")
rt$what <- factor(rt$what)
rt$what <- factor(rt$what, levels = c("Quote", "Uncertainty", "Title change", "Article count (hierarchical)", "Article count"))
rt$broad <- "Amount of\ncoverage"
rt$broad[5:10] <- "Content of\ncoverage"

rt$broad <- factor(rt$broad, levels = c("Amount of\ncoverage", "Content of\ncoverage"))

robust_plot <- ggplot(rt, aes(x = what, y = coef, ymin = lwr, ymax = upr, colour = model, shape = model)) +
  geom_hline(yintercept = 0, alpha = 0.5, linetype = 2) +
  facet_grid(broad~., scales = "free_y", space = "free_y") +
  geom_pointrange(position = position_dodge(width = 0.5)) + coord_flip() +
  theme_minimal(base_size = 10) +
  theme(panel.background = element_rect(fill = "grey98", colour = NA)) +
  theme(strip.text = element_text(size = 12), strip.text.y = element_text(angle = 360),
        axis.text.y = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.title.y = element_text(size = 12),
        panel.grid.major.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.y = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.minor.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.grid.major.x = element_line(colour = "grey90", size = 0.2, linetype = 2),
        panel.spacing.y = unit(0.25, "lines"),
        title = element_text(size = 12, face = "bold"), 
        legend.position = c(0.2, 0.95),
        legend.direction = "horizontal") +
  theme(plot.margin = margin(1, 1, 1, 1, "cm")) +
  scale_colour_manual("", values = c("black", "grey70")) +
  scale_shape_manual("", values = c(19, 15), guide = FALSE) +
  labs(
    x = "",
    title = "Alternative to volatility: maximum change",
    y    = "Effect size (logit coefficient)",
    caption  = "",
    subtitle  = "")
robust_plot